"""Utilities for translating ORF detection
"""
# Part of ribotricer software
#
# Copyright (C) 2019 Saket Choudhary, Wenzheng Li, and Andrew D Smith
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

import sys
from .interval import Interval


class ORF:
    """Class for candidate ORF."""

    def __init__(
        self,
        category,
        transcript_id,
        transcript_type,
        gene_id,
        gene_name,
        gene_type,
        chrom,
        strand,
        intervals,
        seq="",
        leader="",
        trailer="",
    ):
        self.category = category
        self.tid = transcript_id
        self.ttype = transcript_type
        self.gid = gene_id
        self.gname = gene_name
        self.gtype = gene_type
        self.chrom = chrom
        self.strand = strand
        self.intervals = sorted(intervals, key=lambda x: x.start)
        start = self.intervals[0].start
        end = self.intervals[-1].end
        self.oid = "{}_{}_{}_{}".format(
            transcript_id,
            start,
            end,
            sum([x.end - x.start + 1 for x in self.intervals]),
        )
        self.seq = seq
        self.leader = leader
        self.trailer = trailer

    @property
    def start_codon(self):
        """Return the first 3 bases from sequence"""
        if len(self.seq) < 3:
            return None
        return self.seq[:3]

    @classmethod
    def from_string(cls, line):
        """
        Parameters
        ----------
        line: string
              line for ribotricer index file generated by prepare_orfs


        This method uses a fail-fast stragy and hence multiple
        returns. It ultimately retulrs an object correponding to the
        parsed line.
        """
        if not line:
            print("annotation line cannot be empty")
            return None
        fields = line.split("\t")
        if len(fields) != 11:
            sys.exit(
                "{}\n{}".format(
                    "Error: unexpected number of columns found for index file",
                    "please run ribotricer prepare-orfs to regenerate",
                )
            )
            return None
        oid = fields[0]
        category = fields[1]
        tid = fields[2]
        ttype = fields[3]
        gid = fields[4]
        gname = fields[5]
        gtype = fields[6]
        chrom = fields[7]
        strand = fields[8]
        start_codon = fields[9]
        coordinate = fields[10]
        intervals = []
        for group in coordinate.split(","):
            start, end = group.split("-")
            start = int(start)
            end = int(end)
            intervals.append(Interval(chrom, start, end, strand))
        return cls(
            category,
            tid,
            ttype,
            gid,
            gname,
            gtype,
            chrom,
            strand,
            intervals,
            seq=start_codon,
        )

    @classmethod
    def from_tracks(cls, tracks, category, seq="", leader="", trailer=""):
        """
        Parameters
        ----------
        tracks: list of GTFTrack

        This method uses a fail-fast stragy and hence multiple
        returns. It ultimately retulrs an object correponding to the
        parsed line.
        """
        if not tracks:
            return None
        intervals = []
        tid = set()
        ttype = set()
        gid = set()
        gname = set()
        gtype = set()
        chrom = set()
        strand = set()
        required_attributes = [
            "transcript_id",
            "transcript_type",
            "gene_id",
            "gene_name",
            "gene_type",
            "chrom",
            "strand",
        ]
        for track in tracks:
            try:
                tid.add(track.transcript_id)
                ttype.add(track.transcript_type)
                gid.add(track.gene_id)
                gname.add(track.gene_name)
                gtype.add(track.gene_type)
                chrom.add(track.chrom)
                strand.add(track.strand)
                intervals.append(
                    Interval(track.chrom, track.start, track.end, track.strand)
                )
            except AttributeError:
                for attribute in required_attributes:
                    if not hasattr(track, attribute):
                        print('missing attribute "{}" in {}'.format(attribute, track))
                        return None
        if (
            len(tid) != 1
            or len(ttype) != 1
            or len(gid) != 1
            or len(gname) != 1
            or len(gtype) != 1
            or len(chrom) != 1
            or len(strand) != 1
        ):
            print("inconsistent tracks for ORF: {}".format(track))
            return None
        tid = list(tid)[0]
        ttype = list(ttype)[0]
        gid = list(gid)[0]
        gname = list(gname)[0]
        gtype = list(gtype)[0]
        chrom = list(chrom)[0]
        strand = list(strand)[0]
        return cls(
            category,
            tid,
            ttype,
            gid,
            gname,
            gtype,
            chrom,
            strand,
            intervals,
            seq,
            leader,
            trailer,
        )
